library(dplyr)
library(haven)
library(writexl)
library(readxl)

df <- as.data.frame(read_xlsx("variables_dependientes_mun.xlsx"))
df2 <- as.data.frame(read_xlsx("diconsa.xlsx"))
encig <- as.data.frame(read_csv("encig2021.csv"))
envipe <- as.data.frame(read_csv("per_vic.csv"))
becas0 <- as.data.frame(read_csv("becas0.csv"))
becas1 <- as.data.frame(read_csv("becas1.csv"))
becas2 <- as.data.frame(read_csv("becas2.csv"))
becas3 <- as.data.frame(read_csv("becas3.csv"))
pob <- as.data.frame(read_csv("c20N.csv"))


df <- df %>% mutate_if(is.character, as.numeric)

# Agrupar por ent y mun, sumar POB_TOT_2010 y crear la variable ti_dico
df2_grouped <- df2 %>%
  group_by(ent, mun) %>%
  summarise(
    ti_dico = n(),
    POB_TOT_2010_sum = sum(POB_TOT_2010, na.rm = TRUE)
  ) %>%
  ungroup()

# Crear la variable td_phab
df2_grouped <- df2_grouped %>%
  mutate(td_phab = POB_TOT_2010_sum / ti_dico)

# Función para reducir el data frame
reduce_dataframe <- function(df, id_vars) {
  
  # Resumir el data frame
  reduced_df <- df %>%
    group_by(across(all_of(id_vars))) %>%
    summarise(across(everything(), ~ mean(.x, na.rm = TRUE)),
              .groups = 'drop')
  
  return(reduced_df)
}

# Definir las variables de identificación (ent y mun)
id_vars <- c("ent", "mun")

# Reducir el data frame
reduced_df <- reduce_dataframe(df, id_vars)

# Unir reduced_df con df2_grouped por ent y mun
result_df <- reduced_df %>%
  left_join(df2_grouped, by = c("ent", "mun"))

reg_dico <- lm(tallaedad ~ td_phab, data = na.omit(result_df))
summary(reg_dico)

## Corrupción
encig <- encig %>%
  mutate(
    P8_1 = ifelse(P8_1 == 9, NA, P8_1),
    P8_2 = ifelse(P8_2 == 9, NA, P8_2)
  )

encig <- encig %>%
  mutate(
    P8_1 = ifelse(P8_1 == 2, 0, P8_1),
    P8_2 = ifelse(P8_2 == 2, 0, P8_2)
  )

encig_grouped <- encig %>%
  group_by(ent, mun) %>%
  summarise(
    P8_1_mean = mean(P8_1, na.rm = TRUE),
    P8_2_mean = mean(P8_2, na.rm = TRUE)
  )

encig_grouped <- encig_grouped %>%
  mutate(in_cor = (P8_1_mean + P8_2_mean) / 2)

encig_grouped <- encig_grouped %>%
  mutate(
    ent = as.character(as.integer(ent)),
    mun = as.character(as.integer(mun))
  )

result_df <- result_df %>%
  mutate(
    ent = as.character(as.integer(ent)),
    mun = as.character(as.integer(mun))
  )

final_df <- result_df %>%
  left_join(encig_grouped, by = c("ent", "mun"))

reg_cor <- lm(tallaedad ~ in_cor, data = na.omit(final_df))
summary(reg_cor)

reg_cordic <- lm(tallaedad ~ in_cor + td_phab, data = na.omit(final_df))
summary(reg_cordic)

reg_cordicint <- lm(tallaedad ~ in_cor*td_phab, data = na.omit(final_df))
summary(reg_cordicint)

## INSEGURIDAD Y VIOLENCIA

envipe <- envipe %>%
  mutate(
    AP4_3_1 = ifelse(AP4_3_1 == 9, NA, AP4_3_1),
    AP4_4_01 = ifelse(AP4_4_01 == 9, NA, AP4_4_01),
    AP4_4_05 = ifelse(AP4_4_05 == 9, NA, AP4_4_05),
    AP4_8_4 = ifelse(AP4_8_4 == 9, NA, AP4_8_4)
  )

envipe <- envipe %>%
  mutate(
    AP4_3_1 = ifelse(AP4_3_1 == 2, 0, AP4_3_1),
    AP4_4_01 = ifelse(AP4_4_01 == 2, 0, AP4_4_01),
    AP4_4_05 = ifelse(AP4_4_05 == 2, 0, AP4_4_05),
    AP4_8_4 = ifelse(AP4_8_4 == 2, 0, AP4_8_4)
  )

envipe_grouped <- envipe %>%
  group_by(ent, mun) %>%
  summarise(
    AP4_3_1_mean = mean(AP4_3_1, na.rm = TRUE),
    AP4_4_01_mean = mean(AP4_4_01, na.rm = TRUE),
    AP4_4_05_mean = mean(AP4_4_05, na.rm = TRUE),
    AP4_8_4 = mean(AP4_8_4, na.rm = TRUE)
  )

envipe_grouped <- envipe_grouped %>%
  mutate(in_ins = (AP4_3_1_mean + AP4_4_01_mean + AP4_4_05_mean) / 3)

envipe_grouped <- envipe_grouped %>%
  mutate(
    ent = as.character(as.integer(ent)),
    mun = as.character(as.integer(mun))
  )

final_df <- final_df %>%
  mutate(
    ent = as.character(as.integer(ent)),
    mun = as.character(as.integer(mun))
  )

final2_df <- final_df %>%
  left_join(envipe_grouped, by = c("ent", "mun"))

reg_ins <- lm(tallaedad ~ in_ins, data = na.omit(final2_df))
summary(reg_ins)

reg_vio <- lm(tallaedad ~ AP4_8_4, data = na.omit(final2_df))
summary(reg_vio)

## Becas

becas0 <- becas0 %>%
  mutate(
    ent = as.character(ent),
    mun = as.character(mun)
  )

becas1 <- becas1 %>%
  mutate(
    ent = as.character(ent),
    mun = as.character(mun)
  )

becas2 <- becas2 %>%
  mutate(
    ent = as.character(ent),
    mun = as.character(mun)
  )

becas3 <- becas3 %>%
  mutate(
    ent = as.character(ent),
    mun = as.character(mun)
  )

becas <- bind_rows(becas0, becas1, becas2, becas3)

becas <- becas %>%
  mutate(mun = substr(as.character(mun), nchar(as.character(mun)) - 2, nchar(as.character(mun))))

# Contar el número de casos por cada combinación de ent y mun
becas_grouped <- becas %>%
  group_by(ent, mun) %>%
  summarise(becas_netas = n()) %>%
  ungroup()

# Guardar el resultado en un nuevo DataFrame
result_becas <- becas_grouped %>%
  select(ent, mun, becas_netas)

# Sumar la variable POBTOT para cada combinación de ent y mun
pob_summarized <- pob %>%
  group_by(ent, mun) %>%
  summarise(pob = sum(POBTOT, na.rm = TRUE)) %>%
  ungroup()

result_becas <- result_becas %>%
  mutate(
    ent = as.character(ent),
    mun = as.character(mun)
  )

result_becas <- result_becas %>%
  mutate(
    ent = as.character(as.integer(ent)),
    mun = as.character(as.integer(mun))
  )

pob_summarized <- pob_summarized %>%
  mutate(
    ent = as.character(as.integer(ent)),
    mun = as.character(as.integer(mun))
  )

# Verificar algunos valores de ent y mun en ambos DataFrames
head(result_becas)
head(pob_summarized)

# Unir los DataFrames por ent y mun
final3_df <- result_becas %>%
  left_join(pob_summarized, by = c("ent", "mun"))

final3_df$rbecas <- final3_df$becas_netas / final3_df$pob * 100

reduced_df$ent = as.character(as.integer(reduced_df$ent))
reduced_df$mun = as.character(as.integer(reduced_df$mun))

final4_df <- final3_df %>%
  left_join(reduced_df, by = c("ent", "mun"))

reg_bec <- lm(tallaedad ~ rbecas, data = na.omit(final4_df))
summary(reg_bec)

